# Retrieve csv file from google drive by mapping the folder from google drive.
# Must be done each time session expires.
from google.colab import drive
drive.mount('/content/drive')
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import stats
import csv
np.random.seed(123456)
# Retrieve and safe a copy the karate_club_graph()
kc_g = nx.karate_club_graph()
# Show basic graph information
print(nx.info(kc_g))
# help(nx.draw)
# nx.draw is a simple graph drawing function that utilizes Matplotlib
# sacle up the figsize
plt.figure(figsize=(10,8))
nx.draw(kc_g, with_labels=True, node_color='lime')
# help(nx.draw_networkx)
# Draw graphs using nx.draw_networkx
plt.figure(figsize=(10,10))
nx.draw_networkx(kc_g, with_labels=True, node_color='red')
plt.figure(figsize=(10,10))
nx.draw_networkx(kc_g, pos = nx.spring_layout(kc_g), with_labels=True)
plt.figure(figsize=(10,10))
nx.draw_spring(kc_g, with_labels=True, node_color='gold')
plt.show()
plt.figure(figsize=(10,10))
g_layout = nx.circular_layout(kc_g)
nx.draw_networkx(kc_g, g_layout, with_labels=True, node_color='gold')
plt.figure(figsize=(10,10))
nx.draw_circular(kc_g, with_labels=True)
plt.show()
# nx.kamada_kawai is also a nice node positioning algorithm
plt.figure(figsize=(10,10))
g_layout = nx.kamada_kawai_layout(kc_g)
nx.draw_networkx(kc_g, g_layout, with_labels=True, node_color='gold')
# It is not practical to show all nodes and node information this way
print(kc_g.nodes)
kc_g.nodes('club')
kc_g.nodes('club')[0]
print("Node club\n")
for v in kc_g:
print(f"{v:4} {kc_g.nodes('club')[v]}")
print(f'Number of nodes: {kc_g.number_of_nodes()}, Number of edges: {kc_g.number_of_edges()}')
# It is not practical to show all of the edges this way
kc_g.edges
print('Graph edges:\n')
for e in kc_g.edges: print(e)
#help(nx.adj_matrix)
# The default storage format of adjacency matrices is the sparse matrix format
print(nx.adj_matrix(kc_g))
# Show an adjacency matrix in a dense matrix format
print(nx.adj_matrix(kc_g).todense())
# It is not practical to show all node degrees this way
kc_g.degree
kc_g.degree(0)
print("Node Degree\n")
for v in kc_g.nodes:
print(f"{v:4} {kc_g.degree(v):6}")
# right-justified each column
# help(nx.degree_histogram)
nx.degree_histogram(kc_g)
# Frequency (# of nodes) of a degree - 0, 1, 2, up to 17
# Draw a bar graph of number of Node Degree Distribution
# create x and y values of each bar
y_axis = nx.degree_histogram(kc_g)
x_axis = list(range(0,len(y_axis)))
# Draw bars using y_axis and x_axis
plt.bar(x_axis, y_axis, color = 'purple')
# draw tick labels
plt.xticks(x_axis)
plt.yticks(list(range(0,max(y_axis)+1)))
# Add title and axis names
plt.title(f'Node Degree Distributation in {kc_g.name}')
plt.xlabel('Degree')
plt.ylabel('Frequency')
plt.show()
degrees = nx.degree_histogram(kc_g)
y_axis = [i/sum(degrees) for i in degrees]
# x_axis = list(range(0,len(y_axis)))
plt.bar(x_axis, y_axis, color = 'purple')
# Draw axis labels
plt.xticks(x_axis)
plt.yticks([0.0,0.1,0.2,0.3,0.4,0.5])
# Add title and axis names
plt.title(f'Degree Percentage Distributation in {kc_g.name}')
plt.xlabel('Degree')
plt.ylabel('Percentage')
plt.ylim(0,0.5)
plt.show()
plt.figure(figsize=(10,9))
plt.title(f'Circular Plot of {kc_g.name} Showing Node Degrees via Node Colors and Sizes', fontsize = 14)
degree_color = [kc_g.degree(v) for v in kc_g]
degree_size = [100*kc_g.degree(v) for v in kc_g]
nx.draw_circular(kc_g, node_color = degree_color , node_size = (degree_size),font_color = 'r', with_labels=True, cmap = plt.cm.Greens)
# https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html
plt.show()
#help(nx.shortest_path)
nx.shortest_path(kc_g,0,33)
nx.shortest_path_length(kc_g,0,33)
nx.shortest_path(kc_g,6,33)
nx.shortest_path(kc_g,6,33)
nx.shortest_path_length(kc_g,6,33)
nx.shortest_path(kc_g,33)
nx.shortest_path_length(kc_g,33)
nx.average_shortest_path_length(kc_g)
nx.eccentricity(kc_g)
nx.diameter(kc_g)
nx.radius(kc_g)
nx.periphery(kc_g)
nx.center(kc_g)
# Create a 4-node complete graph
G = nx.complete_graph(4)
nx.draw_circular(G, with_labels=True)
# help(nx.clustering)
nx.clustering(G)
nx.average_clustering(G)
nx.clustering(kc_g)
round(nx.average_clustering(kc_g),2)
# help(nx.read_weighted_edgelist)
# Load a graph from a list of edges in a csv file
copur_sup_g = nx.read_weighted_edgelist('/content/drive/My Drive/Walmart_8_depts_support_edges.csv', delimiter =",")
copur_sup_g.name = 'Walmart Copurchase Department Support Graph'
plt.figure(figsize =(12, 9))
plt.title(copur_sup_g.name, fontsize=16, color= 'b')
nx.draw_circular(copur_sup_g, with_labels = True, node_color='gold',node_size=1000, font_size=12,font_color = 'b')
print(nx.info(copur_sup_g))
copur_sup_g.nodes
copur_sup_g.edges
print(copur_sup_g.edges.data('weight'))
for (u, v, wt) in copur_sup_g.edges.data('weight'):
print(f"({u}, {v}, {wt:.3})")
# adjacency matrix of a weighted graph shows weight values instead of zeros and ones
nx.adj_matrix(copur_sup_g).todense()
print("Node Degree\n")
for v in copur_sup_g.nodes:
print(f'{v:25} {copur_sup_g.degree(v):6}')
# right-justified each column
nx.degree_histogram(copur_sup_g)
# Draw a bar graph of number of Node Degree Distribution
# create x and y values of each bar
y_axis = nx.degree_histogram(copur_sup_g)
x_axis = list(range(0,len(y_axis)))
# Draw bars using y_axis and x_axis
plt.bar(x_axis, y_axis, color = 'purple')
# draw tick labels
plt.xticks(x_axis)
plt.yticks(list(range(0,max(y_axis)+1)))
# Add title and axis names
plt.title(f'Node Degree Distributation in {copur_sup_g.name}')
plt.xlabel('Degree')
plt.ylabel('Frequency')
plt.show()
degrees = nx.degree_histogram(copur_sup_g)
y_axis = [i/sum(degrees) for i in degrees]
# x_axis = list(range(0,len(y_axis)))
plt.bar(x_axis, y_axis, color = 'purple')
# Draw axis labels
plt.xticks(x_axis)
plt.yticks([0.0,0.1,0.2,0.3,0.4,0.5])
# Add title and axis names
plt.title(f'Degree Percentage Distributation in {copur_sup_g.name}')
plt.xlabel('Degree')
plt.ylabel('Percentage')
plt.ylim(0,0.5)
plt.show()
plt.figure(figsize=(10,10))
plt.title(f'Circular Plot of {copur_sup_g.name} Showing Weights via edge_width', fontsize = 14)
degree_color = [copur_sup_g.degree(v) for v in copur_sup_g]
degree_size = [200*copur_sup_g.degree(v) for v in copur_sup_g]
edge_width = [50*(copur_sup_g[u][v]['weight']) for u,v in copur_sup_g.edges]
nx.draw_circular(copur_sup_g, width=edge_width,edge_color = 'orange',node_color = degree_color , node_size = (degree_size), with_labels=True, cmap = plt.cm.Oranges)
plt.show()
nx.shortest_path(copur_sup_g)
path_ct = 0
sum_shortest_len = 0
for v1 in copur_sup_g.nodes:
for v2 in copur_sup_g.nodes:
if v1 != v2:
path_ct +=1
sum_shortest_len +=nx.shortest_path_length(copur_sup_g,v1,v2)
print(f"({v1}, {v2}, {nx.shortest_path_length(copur_sup_g,v1,v2)})")
print('average shortest path length = ', sum_shortest_len/path_ct)
nx.average_shortest_path_length(copur_sup_g)
nx.eccentricity(copur_sup_g)
nx.diameter(copur_sup_g)
nx.radius(copur_sup_g)
nx.periphery(copur_sup_g)
nx.center(copur_sup_g)
nx.clustering(copur_sup_g)
nx.average_clustering(copur_sup_g)
# Load a graph from an input csv file
copur_conf_g = nx.read_weighted_edgelist('/content/drive/My Drive/Walmart_8_depts_confidence_edges.csv', delimiter =",",create_using=nx.DiGraph)
copur_conf_g.name = 'Walmart Copurchase Department Confidence Graph'
# g_layout = nx.circular_layout(copur_conf_g)
plt.figure(figsize =(12, 9))
plt.title(copur_conf_g.name, fontsize=16)
nx.draw_circular(copur_conf_g, with_labels = True, node_color='gold',node_size=1000, font_size=12, edge_color = 'b')
print(nx.info(copur_conf_g))
copur_conf_g.nodes
copur_conf_g.edges
for (u, v, wt) in copur_conf_g.edges.data('weight'):
print(f"({u}, {v}, {wt:.3})")
nx.adj_matrix(copur_conf_g).todense()
plt.figure(figsize=(10,10))
plt.title(f'Circular Plot of {copur_sup_g.name} Showing Weights via edge_width', fontsize = 14)
degree_color = [copur_conf_g.degree(v) for v in copur_conf_g]
degree_size = [200*copur_conf_g.degree(v) for v in copur_conf_g]
edge_width = [10*(copur_conf_g[u][v]['weight']) for u,v in copur_conf_g.edges]
nx.draw_circular(copur_conf_g, width=edge_width,edge_color = 'orange',node_color = degree_color , node_size = (degree_size), with_labels=True, cmap = plt.cm.Oranges)
plt.show()
print("Node OutDegree InDegree Degree\n")
for v in copur_sup_g.nodes:
print(f'{v:26}{copur_conf_g.out_degree(v):9}{copur_conf_g.in_degree(v):9}{copur_conf_g.degree(v):7}')
# right-justified each column
nx.degree_histogram(copur_conf_g)
# The same as copur_sup_g. No existing functions to generate distribution of in and out degrees.
# Draw a bar graph of number of Node Degree Distribution
# create x and y values of each bar
y_axis = nx.degree_histogram(copur_conf_g)
x_axis = list(range(0,len(y_axis)))
# Draw bars using y_axis and x_axis
plt.bar(x_axis, y_axis, color = 'purple')
# draw tick labels
plt.xticks(x_axis)
plt.yticks(list(range(0,max(y_axis)+1)))
# Add title and axis names
plt.title(f'Node Degree Distributation in {copur_conf_g.name}')
plt.xlabel('Degree')
plt.ylabel('Frequency')
plt.show()
degrees = nx.degree_histogram(copur_conf_g)
y_axis = [i/sum(degrees) for i in degrees]
# x_axis = list(range(0,len(y_axis)))
plt.bar(x_axis, y_axis, color = 'purple')
# Draw axis labels
plt.xticks(x_axis)
plt.yticks([0.0,0.1,0.2,0.3,0.4,0.5])
# Add title and axis names
plt.title(f'Degree Percentage Distributation in {copur_conf_g.name}')
plt.xlabel('Degree')
plt.ylabel('Percentage')
plt.ylim(0,0.5)
plt.show()
nx.shortest_path(copur_conf_g)
nx.shortest_path(copur_conf_g,source='MEAT - FRESH & FROZEN')
nx.shortest_path_length(copur_conf_g,'MEAT - FRESH & FROZEN', 'PRODUCE')
nx.shortest_path(copur_conf_g,source='DAIRY')
nx.shortest_path(copur_conf_g,target='DAIRY')
nx.average_shortest_path_length(copur_conf_g)
Ref: https://en.wikipedia.org/wiki/Strongly_connected_component
nx.is_strongly_connected(copur_conf_g)
nx.clustering(copur_conf_g)
nx.average_clustering(copur_conf_g)
FBGraph = nx.read_weighted_edgelist('/content/drive/My Drive//FacebookEdges_limited_A7S2.csv', delimiter =",")
FBGraph.name = 'Facebook Graph of Randomly Sampled Users'
print(nx.info(FBGraph))
plt.figure(figsize=(25,25))
# g_layout = nx.kamada_kawai_layout(FBGraph)
# nx.draw_networkx(FBGraph,g_layout, with_labels = True)
# nx.draw_circular(FBGraph, with_labels = True, font_size=14)
nx.draw_networkx(FBGraph, with_labels = True)
# plt.axis('off')
# plt.tight_layout()
plt.show()
with open('/content/drive/My Drive/FacebookNodes_limited_A7S2.csv') as csvfile:
nodereader = csv.reader(csvfile, delimiter=',')
for row in nodereader:
if(row[0] in FBGraph.nodes):
FBGraph.nodes[row[0]]['name'] = row[1]
FBGraph.nodes[row[0]]['gender'] = row[2]
FBGraph.nodes[row[0]]['age'] = int(row[3])
FBGraph.nodes[row[0]]['country'] = row[4]
FBGraph.nodes.data('gender')
FB_name_Graph = nx.relabel_nodes(FBGraph, nx.get_node_attributes(FBGraph, 'name'))
plt.figure(figsize=(25,25))
nx.draw_networkx(FB_name_Graph, with_labels = True)
# plt.axis('off')
plt.tight_layout()
plt.show()
plt.figure(figsize=(25,25))
nx.draw_circular(FB_name_Graph, with_labels = True)
# plt.axis('off')
# plt.tight_layout()
plt.show()
print(nx.info(FB_name_Graph))
# As we will see later, FB_name_Graph collapsed different nodes with the same names in FBGraph
# To avoid loss of information, we continue to use FBGraph
# Use set to get unique countries only
set(nx.get_node_attributes(FBGraph, 'country').values())
colorMap = {'Belgium':'black', 'Germany':'gold','Canada':'r', 'Costa Rica':'brown','Japan':'maroon', 'Now Coalinga':'purple',
'Mexico':'olive','Philippines':'blue','Poland':'yellow','Singapore':'orange', 'United Kingdom':'green', 'United States':'royalblue'}
node_color = [colorMap[FBGraph.nodes[v]['country']] for v in FBGraph.nodes()]
plt.figure(figsize=(10,10))
nx.draw(FBGraph, with_labels = False, node_color=node_color)
nx.adj_matrix(FBGraph).todense()
# Draw a bar graph of number of Node Degree Distribution
# create x and y values of each bar
y_axis = nx.degree_histogram(FBGraph)
x_axis = list(range(0,len(y_axis)))
# Draw bars using y_axis and x_axis
plt.bar(x_axis, y_axis, color = 'purple')
# draw tick labels
plt.xticks(x_axis)
plt.yticks(list(range(0,max(y_axis)+1)))
# Add title and axis names
plt.title(f'Node Degree Distributation in {FBGraph.name}')
plt.xlabel('Degree')
plt.ylabel('Frequency')
plt.show()
degrees = nx.degree_histogram(FBGraph)
y_axis = [i/sum(degrees) for i in degrees]
# x_axis = list(range(0,len(y_axis)))
plt.bar(x_axis, y_axis, color = 'purple')
# Draw axis labels
plt.xticks(x_axis)
plt.yticks([0.00,0.05,0.10,0.15,0.20,0.25])
# Add title and axis names
plt.title(f'Degree Percentage Distributation in {FBGraph.name}')
plt.xlabel('Degree')
plt.ylabel('Percentage')
plt.ylim(0,0.25)
plt.show()
nx.shortest_path(FBGraph,'99','163')
nx.shortest_path(FBGraph,'99','471')
nx.shortest_path_length(FBGraph,source='99')
nx.average_shortest_path_length(FBGraph)
nx.diameter(FBGraph)
nx.radius(FBGraph)
nx.periphery(FBGraph)
nx.center(FBGraph)
nx.average_clustering(FBGraph)
df = pd.DataFrame(index=FBGraph.nodes())
df['name'] = pd.Series(nx.get_node_attributes(FBGraph, 'name'))
df['gender'] = pd.Series(nx.get_node_attributes(FBGraph, 'gender'))
df['age'] = pd.Series(nx.get_node_attributes(FBGraph, 'age'))
df['country'] = pd.Series(nx.get_node_attributes(FBGraph, 'country'))
df['degree'] = pd.Series(dict(FBGraph.degree()))
df['eccentricity'] = pd.Series(nx.eccentricity(FBGraph))
df['clustering'] = pd.Series(nx.clustering(FBGraph))
df.head(20)
# Create another data frame from FB_name_Graph for comparison
df1 = pd.DataFrame(index=FB_name_Graph.nodes())
df1['name'] = pd.Series(nx.get_node_attributes(FB_name_Graph, 'name'))
df1['gender'] = pd.Series(nx.get_node_attributes(FB_name_Graph, 'gender'))
df1['age'] = pd.Series(nx.get_node_attributes(FB_name_Graph, 'age'))
df1['country'] = pd.Series(nx.get_node_attributes(FB_name_Graph, 'country'))
df1['degree'] = pd.Series(dict(FB_name_Graph.degree()))
df1['eccentricity'] = pd.Series(nx.eccentricity(FB_name_Graph))
df1['clustering'] = pd.Series(nx.clustering(FB_name_Graph))
df1
# Show duplicated users
df[df.name.duplicated()]
df[df['name'].str.match('Hamtaro')]
df1[df1['name'].str.match('Hamtaro')]
df.degree.sort_values(ascending=False)
# For comparison, you can see the changes in degree count between the two FB graphs
# Continue with df
df1.degree.sort_values(ascending=False)
df.clustering.sort_values(ascending=False)
df.eccentricity.sort_values(ascending=False)
df.age.sort_values(ascending=False)
# help(nx.degree_centrality)
# G is a complete graph
G.degree
nx.degree_centrality(G)
# Each node in a complete graph has the maximum # of degrees
# help(nx.closeness_centrality)
nx.closeness_centrality(G)
# The average distance to other nodes and the inverse of this average is 1
# help(nx.betweenness_centrality)
nx.betweenness_centrality(G)
# Each pair of other nodes in a complete graph are connected to each other.
# None of their shortest paths for other pairs of nodes pass through a focal node.
# help(nx.edge_betweenness_centrality)
nx.edge_betweenness_centrality(G)
# In a complete graph, the lowest possible edge_betweeness is 1/6,
# because the total number of shortest paths, 6, is als the total number of edges.
u_g = nx.Graph()
u_g.add_node(0)
u_g.add_edge(0, 1)
u_g.add_edge(1, 2)
u_g.add_edge(2, 3)
u_g.add_edges_from([(2,0),(3, 1)])
nx.draw(u_g, with_labels=True, node_color='brown',font_color='white')
d_g = nx.DiGraph()
d_g.add_node(0)
d_g.add_edge(0, 1)
d_g.add_edge(1, 2)
d_g.add_edge(2, 3)
d_g.add_edges_from([(2, 0), (3, 1)])
nx.draw(d_g, with_labels=True, node_color='brown',font_color='white')
print(u_g.degree,'\n',d_g.degree,'\n')
print(nx.degree_centrality(u_g),'\n',nx.degree_centrality(d_g))
print(list(nx.shortest_path_length(u_g)),'\n',list(nx.shortest_path_length(d_g)),'\n')
# In u_g, average shortest path lengths to other nodes is 1 for nodes 1 and 2
# In u_g, aAverage shortest path lengths to other nodes is 4/3 for nodes 0 and 3
# In d_g, average shortest path lengths to other nodes: 4/3 for node 1, 3/2 for 2
# In d_g, aAverage shortest path lengths to other nodes is 2 for nodes 0 and 3
print(nx.closeness_centrality(u_g),'\n',nx.closeness_centrality(d_g))
print(nx.betweenness_centrality(u_g),'\n',nx.betweenness_centrality(d_g))
nx.edge_betweenness_centrality(u_g)
# Node betweenness may affect edge_betweenness
nx.edge_betweenness_centrality(d_g)
# Node betweenness may affect edge_betweenness
kc_df = pd.DataFrame(index=kc_g.nodes())
kc_df['club'] = pd.Series(nx.get_node_attributes(kc_g, 'club'))
kc_df['degree'] = pd.Series(dict(kc_g.degree()))
kc_df['eccentricity'] = pd.Series(nx.eccentricity(kc_g))
kc_df['clustering'] = pd.Series(nx.clustering(kc_g))
kc_df['degree_centrality'] = pd.Series(nx.degree_centrality(kc_g))
kc_df['closeness'] = pd.Series(nx.closeness_centrality(kc_g))
kc_df['betweenness'] = pd.Series(nx.betweenness_centrality(kc_g))
kc_df
nx.edge_betweenness_centrality(kc_g)
kc_df.drop('club',axis=1).corr()
copur_sup_df = pd.DataFrame(index=copur_sup_g.nodes())
copur_sup_df['degree'] = pd.Series(dict(copur_sup_g.degree()))
copur_sup_df['eccentricity'] = pd.Series(nx.eccentricity(copur_sup_g))
copur_sup_df['clustering'] = pd.Series(nx.clustering(copur_sup_g))
copur_sup_df['degree_centrality'] = pd.Series(nx.degree_centrality(copur_sup_g))
copur_sup_df['closeness'] = pd.Series(nx.closeness_centrality(copur_sup_g))
copur_sup_df['betweenness'] = pd.Series(nx.betweenness_centrality(copur_sup_g))
copur_sup_df
nx.edge_betweenness_centrality(copur_sup_g)
copur_sup_df.corr()
copur_conf_df = pd.DataFrame(index=copur_conf_g.nodes())
copur_conf_df['degree'] = pd.Series(dict(copur_conf_g.degree()))
copur_conf_df['out_degree'] = pd.Series(dict(copur_conf_g.out_degree()))
copur_conf_df['in_degree'] = pd.Series(dict(copur_conf_g.in_degree()))
# copur_conf_df['eccentricity'] = pd.Series(nx.eccentricity(copur_conf_g))
copur_conf_df['clustering'] = pd.Series(nx.clustering(copur_conf_g))
copur_conf_df['degree_cen'] = pd.Series(nx.degree_centrality(copur_conf_g))
copur_conf_df['out_degree_cen'] = pd.Series(nx.out_degree_centrality(copur_conf_g))
copur_conf_df['in_degree_cen'] = pd.Series(nx.in_degree_centrality(copur_conf_g))
copur_conf_df['closeness'] = pd.Series(nx.closeness_centrality(copur_conf_g))
copur_conf_df['betweenness'] = pd.Series(nx.betweenness_centrality(copur_conf_g))
copur_conf_df
nx.edge_betweenness_centrality(copur_conf_g)
copur_conf_df.corr()
fb_df = pd.DataFrame(index=FBGraph.nodes())
df['name'] = pd.Series(nx.get_node_attributes(FBGraph, 'name'))
fb_df['gender'] = pd.Series(nx.get_node_attributes(FBGraph, 'gender'))
fb_df['age'] = pd.Series(nx.get_node_attributes(FBGraph, 'age'))
fb_df['country'] = pd.Series(nx.get_node_attributes(FBGraph, 'country'))
fb_df['degree'] = pd.Series(dict(FBGraph.degree()))
fb_df['eccentricity'] = pd.Series(nx.eccentricity(FBGraph))
fb_df['clustering'] = pd.Series(nx.clustering(FBGraph))
fb_df['degree_cen'] = pd.Series(nx.degree_centrality(FBGraph))
fb_df['closeness'] = pd.Series(nx.closeness_centrality(FBGraph))
fb_df['betweenness'] = pd.Series(nx.betweenness_centrality(FBGraph))
# explroe edge_betweenness_centrality of some of the edges if interested
print(fb_df.index)
fb_df
# Check out edge_betweenness_centrality of all edges of FBGraph if you are interested
# nx.edge_betweenness_centrality(FBGraph)
fb_df.drop({'gender','country'},axis=1).corr()
# help(nx.common_neighbors)
list(nx.common_neighbors(G,0,1))
list(nx.common_neighbors(u_g,0,1))
s='pair'
print('In G:\n')
print(f'{s:6}|no of comm neighbors|list of comm neighbors\n')
for edge in G.edges:
print(f'{edge[0]} - {edge[1]} |{len(list(nx.common_neighbors(G,edge[0],edge[1]))):20}| {list(nx.common_neighbors(G,edge[0],edge[1]))}')
print('\n In u_g:\n')
print(f'{s:6}|no of comm neighbors|list of comm neighbors\n')
for edge in u_g.edges:
print(f'{edge[0]} - {edge[1]} |{len(list(nx.common_neighbors(u_g,edge[0],edge[1]))):20}| {list(nx.common_neighbors(u_g,edge[0],edge[1]))}')
# help(nx.common_neighbor_centrality)
# By default, common_neighbor_centrality reports edge attachment scores for edges that don't exist yet
list(nx.common_neighbor_centrality(G))
# Provide a list of edges in nx.common_neighbor_centrality()
list(nx.common_neighbor_centrality(G,G.edges))
print('In u_g:\n\n common_neighbor_centrality of non_edges\n',\
list(nx.common_neighbor_centrality(u_g,list(nx.non_edges(u_g)))),'\n')
print('common_neighbor_centrality of existing edges\n',\
list(nx.common_neighbor_centrality(u_g, u_g.edges)))
list(nx.common_neighbor_centrality(u_g, u_g.edges))
print('In copur_sup_G:\n')
print(f'{s:46}|no of comm neighbors|list of comm neighbors\n')
for edge in copur_sup_g.edges:
print(f'{edge[0]:21} - {edge[1]:21} |{len(list(nx.common_neighbors(copur_sup_g,edge[0],edge[1]))):20}|{list(nx.common_neighbors(copur_sup_g,edge[0],edge[1]))}')
list(nx.common_neighbor_centrality(copur_sup_g,copur_sup_g.edges))
for edge in nx.non_edges(copur_sup_g):
print(edge[0],edge[1],len(list(nx.common_neighbors(copur_sup_g,edge[0],edge[1]))))
list(nx.common_neighbor_centrality(copur_sup_g))
# help(nx.jaccard_coefficient)
list(nx.jaccard_coefficient(G))
list(nx.jaccard_coefficient(G, G.edges))
list(nx.jaccard_coefficient(u_g, u_g.edges))
list(nx.jaccard_coefficient(u_g))
list(nx.jaccard_coefficient(copur_sup_g, copur_sup_g.edges))
list(nx.jaccard_coefficient(copur_sup_g))
kc_no_df = pd.DataFrame(list(nx.common_neighbor_centrality(kc_g)),columns=['node1', 'node2','cn_cen'])
kc_no_df
preds = nx.jaccard_coefficient(kc_g)
preds_p = []
for u, v, p in preds:
preds_p.append(p)
kc_no_df['jaccard'] = pd.Series(preds_p)
y_col = pd.Series(0, name='HasEdge').repeat(len(kc_no_df)).reset_index(drop=True)
kc_no_df = pd.concat([kc_no_df, y_col], axis=1)
kc_no_df
kc_yes_df = pd.DataFrame(list(nx.common_neighbor_centrality(kc_g,kc_g.edges)),columns=['node1', 'node2','cn_cen'])
preds = nx.jaccard_coefficient(kc_g, kc_g.edges)
preds_p = []
for u, v, p in preds:
preds_p.append(p)
kc_yes_df['jaccard'] = pd.Series(preds_p)
y_col = pd.Series(1, name='HasEdge').repeat(len(kc_yes_df)).reset_index(drop=True)
kc_yes_df = pd.concat([kc_yes_df, y_col], axis=1)
kc_yes_df
kc_lp_df = pd.concat([kc_yes_df,kc_no_df]).reset_index(drop=True)
kc_lp_df
kc_lp_df.drop(['node1','node2'],axis=1).corr()
# common_neighbor_centrality is reasonable edge attachment sore to proxy link prediction likelihood
fb_no_df = pd.DataFrame(list(nx.common_neighbor_centrality(FBGraph)),columns=['node1', 'node2','cn_cen'])
preds = nx.jaccard_coefficient(FBGraph)
preds_p = []
for u, v, p in preds:
preds_p.append(p)
fb_no_df['jaccard'] = pd.Series(preds_p)
y_col = pd.Series(0, name='HasEdge').repeat(len(fb_no_df)).reset_index(drop=True)
fb_no_df = pd.concat([fb_no_df, y_col], axis=1)
fb_yes_df = pd.DataFrame(list(nx.common_neighbor_centrality(FBGraph,FBGraph.edges)),columns=['node1', 'node2','cn_cen'])
preds = nx.jaccard_coefficient(FBGraph, FBGraph.edges)
preds_p = []
for u, v, p in preds:
preds_p.append(p)
fb_yes_df['jaccard'] = pd.Series(preds_p)
y_col = pd.Series(1, name='HasEdge').repeat(len(fb_yes_df)).reset_index(drop=True)
fb_yes_df = pd.concat([fb_yes_df, y_col], axis=1)
fb_lp_df = pd.concat([fb_yes_df,fb_no_df]).reset_index(drop=True)
fb_lp_df
fb_lp_df.drop(['node1','node2'],axis=1).corr()
# common_neighbor_centrality is reasonable edge attachment sore to proxy link prediction likelihood
u_g.add_edge(5,6)
nx.draw(u_g, with_labels=True)
nx.is_connected(u_g)
list(nx.connected_components(u_g))
print(nx.is_connected(kc_g),nx.is_connected(copur_sup_g),nx.is_connected(FBGraph))
nx.is_strongly_connected(d_g)
nx.is_strongly_connected(copur_conf_g)
# Create a small graph example using add_edges_from to explore its cliques
c_g = nx.Graph()
edges = [(1,2), (1,6), (2,3), (2,4), (2,6), (3,4), (3,5), (1,3), (4,9), (6,7), (1,4), (9,10), (10,11), (9,11)]
c_g.add_edges_from(edges)
nx.draw(c_g, with_labels = True)
# help(nx.find_cliques)
list(nx.find_cliques(c_g))
# help(nx.community.k_clique_communities)
list(nx.community.k_clique_communities(c_g,3))
list(nx.community.k_clique_communities(c_g,4))
list(nx.community.k_clique_communities(c_g,2))
list(nx.find_cliques(kc_g))
list(nx.community.k_clique_communities(kc_g,4))
plt.figure(figsize=(8,8))
nx.draw(kc_g, with_labels=True)
list(nx.find_cliques(copur_sup_g))
nx.draw(copur_sup_g, with_labels=True)
list(nx.community.k_clique_communities(copur_sup_g,3))
list(nx.community.k_clique_communities(copur_sup_g,4))
# list(nx.find_cliques(FBGraph))
# Run this code if you are interested in inspecting cliques in FBGraph
list(nx.community.k_clique_communities(FBGraph,3))
list(nx.community.k_clique_communities(FBGraph,4))
# help(nx.node_clique_number)
nx.node_clique_number(G)
nx.node_clique_number(u_g)
nx.node_clique_number(kc_g)
nx.node_clique_number(copur_sup_g)
nx.node_clique_number(FBGraph)
# Ranges from 2 to 4
G4 = nx.complete_graph(4)
G4 = nx.relabel_nodes(G4, {0:'a',1:'b',2:'c',3:'d'})
nx.draw(G4,with_labels=True)
nx.is_isomorphic(G,G4)
# help(nx.graph_edit_distance)
nx.graph_edit_distance(G4,G)
G5=nx.complete_graph(5)
nx.draw(G5,with_labels=True)
nx.graph_edit_distance(G4,G5)
G4.edges
G5.edges
nx.graph_edit_distance(G,G5)
u_g.remove_edge(5,6)
u_g.remove_node(5)
u_g.remove_node(6)
nx.draw(u_g,with_labels=True)
nx.graph_edit_distance(G4,u_g)
nx.graph_edit_distance(G5,u_g)
nx.is_isomorphic(G,u_g,edge_match=None,node_match=None)
import networkx.algorithms.bipartite as bipartite
# help(bipartite.is_bipartite)
# A simple example
b_g = nx.Graph()
b_g.add_nodes_from([1,2,3,4])
b_g.add_nodes_from(['a','b','c'])
b_g.add_edges_from([(1,'a'), (1,'b'), (2,'b'), (2,'c'), (3,'c'), (4,'a')])
print(nx.info(b_g),'\n')
print('The statement - b_g is a bipartitle graph is ',bipartite.is_bipartite(b_g),'\n')
nx.draw(b_g, with_labels = True)
def undirected_graph_metric_df(g):
g_df = pd.DataFrame(index=g.nodes())
g_df['degree'] = pd.Series(dict(g.degree()))
g_df['eccentricity'] = pd.Series(nx.eccentricity(g))
g_df['clustering'] = pd.Series(nx.clustering(g))
g_df['degree_centrality'] = pd.Series(nx.degree_centrality(g))
g_df['closeness'] = pd.Series(nx.closeness_centrality(g))
g_df['betweenness'] = pd.Series(nx.betweenness_centrality(g))
return g_df
# Many of the undirected graph functions can be applied to nodes of two types
b_df = undirected_graph_metric_df(b_g)
b_df
#help(nx.bipartite.sets)
l, r = nx.bipartite.sets(b_g)
print('set l:',l,'\n')
print('set r:',r)
# help(bipartite.biadjacency_matrix)
print("Biadjacency matrix")
print(bipartite.biadjacency_matrix(b_g, l, r).todense())
# help(enumerate)
for set_index, node_label in enumerate(l):
print(set_index,node_label)
for set_index, node_label in enumerate(r):
print(set_index,node_label)
# create a customized node layout dictionary - pos1
pos1 = {}
# Update position for drawing with a bipartite group number
# Use update to update the content of pos1
pos1.update((node_label, (1,set_index)) for set_index, node_label in enumerate(l))
pos1.update((node_label, (2,set_index)) for set_index, node_label in enumerate(r))
pos1
# Align drawing of nodes by node type using the customized node layout dictionary
nx.draw(b_g, pos=pos1, with_labels = True)
davis_g = nx.davis_southern_women_graph()
print(nx.info(davis_g),'\n')
print('The statement - davis_g is a bipartitle graph is ',bipartite.is_bipartite(davis_g),'\n')
plt.figure(figsize=(12,12))
nx.draw_circular(davis_g, with_labels=True)
dvs_df = undirected_graph_metric_df(davis_g)
dvs_df
women_set, clubs_set = nx.bipartite.sets(davis_g)
women_set
clubs_set
print(bipartite.biadjacency_matrix(davis_g, women_set, clubs_set).todense())
# print degree information
h1 = 'Member'
h2 = 'Number of Clubs'
print(f'{h1:25} {h2:10}\n')
for w in women_set:
print(f"{w:25} {davis_g.degree(w):10}")
h3 = 'Club'
h4 = 'Number of Members'
print('\n', f'{h3:25} {h4:10}\n')
for w in clubs_set:
print(f"{w:25} {davis_g.degree(w):10}")
pos_davis = {}
# Update position for node from each group
pos_davis.update((node, (1, index)) for index, node in enumerate(clubs_set))
pos_davis.update((node, (2, index)) for index, node in enumerate(women_set))
plt.figure(figsize=(10,10))
nx.draw(davis_g, pos = pos_davis, with_labels=True)
plt.show()
# help(bipartite.projected_graph)
women_g = bipartite.projected_graph(davis_g, women_set)
print(nx.info(women_g),'\n')
print('The statement - women_g is a bipartitle graph is ',bipartite.is_bipartite(women_g),'\n')
plt.figure(figsize=(10,10))
nx.draw_circular(women_g, with_labels=True)
women_df = undirected_graph_metric_df(women_g)
women_df
women_df.corr()
clubs_g = bipartite.projected_graph(davis_g, clubs_set)
print(nx.info(clubs_g),'\n')
print('The statement - clubs_g is a bipartitle graph is ',bipartite.is_bipartite(clubs_g),'\n')
plt.figure(figsize=(6,6))
nx.draw_circular(clubs_g, with_labels=True)
clubs_df = undirected_graph_metric_df(clubs_g)
clubs_df
clubs_df.corr()
# help(bipartite.weighted_projected_graph)
women_wg = bipartite.weighted_projected_graph(davis_g, women_set)
print(nx.info(women_wg),'\n')
print('The statement - women_wg is a bipartitle graph is ',bipartite.is_bipartite(women_wg),'\n')
plt.figure(figsize=(10,10))
comm_clubs =[(women_wg[u][v]['weight']) for u,v in women_wg.edges]
nx.draw_circular(women_wg, width=comm_clubs,edge_color = 'orange',with_labels=True)
women_w_edge_df = pd.DataFrame(list(women_wg.edges.data('weight')))
women_w_edge_df
clubs_wg = bipartite.weighted_projected_graph(davis_g, clubs_set)
print(nx.info(clubs_wg),'\n')
print('The statement - clubs_wg is a bipartitle graph is ',bipartite.is_bipartite(clubs_wg),'\n')
plt.figure(figsize=(6,6))
comm_members =[clubs_wg[u][v]['weight'] for u,v in clubs_wg.edges]
nx.draw_circular(clubs_wg, width=comm_members,edge_color = 'purple',with_labels=True)
clubs_w_edge_df = pd.DataFrame(list(clubs_wg.edges.data('weight')))
clubs_w_edge_df
# help(nx.watts_strogatz_graph)
sw_g1 = nx.watts_strogatz_graph(100,2,0.0)
print(nx.info(sw_g1))
plt.figure(figsize=(10,10))
nx.draw_circular(sw_g1)
sw_g2 = nx.watts_strogatz_graph(100,8,0.2)
print(nx.info(sw_g2))
plt.figure(figsize=(10,10))
nx.draw_circular(sw_g2)
sw_df2 = undirected_graph_metric_df(sw_g2)
sw_df2
sw_df2.corr()
sw_g3 = nx.watts_strogatz_graph(100,8,0.8)
print(nx.info(sw_g3))
plt.figure(figsize=(10,10))
nx.draw_circular(sw_g3)
sw_df3 = undirected_graph_metric_df(sw_g3)
sw_df3
sw_df3.corr()
sw_g4 = nx.watts_strogatz_graph(100,8,0.4)
print(nx.info(sw_g4))
plt.figure(figsize=(10,10))
nx.draw_circular(sw_g4)
sw_df4 = undirected_graph_metric_df(sw_g4)
sw_df4
sw_df4.corr()
A real world example can be social media follower- following relationship.The graph of the relationships (followed/follower) betweens the users, (so we need the relation_user data) but we are also interested about the credibility of each user (so we need the newsUser and labels_training sets).
The resource for this example is: https://www.kaggle.com/adoury/web-mining-project-web-mining-analysis
# Enter and run the following lines of commands in your notebook to generate
# an hmtl file of your notebook with NbConvertApp
# copy the ipynb to the local working directory using ! to run a shell command,
# cp, to copy my notebook in a folder named "Colab Notebooks"
# to a local working directory referenced via ./
!cp "/content/drive/My Drive/Colab Notebooks/Copy of Graph Mining.ipynb" ./
# run the second shell command, jupyter nbconvert --to html "file name of the notebook"
# create html from ipynb
!jupyter nbconvert --to html "Graph Mining.ipynb"